/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Created on: 12/28/2022
Last Modified on: 2/17/2024

Description: This program generates the investigator-level placement and outcome
rates, as well as standard errors for these estimates.

The program takes as an input a child by investigation level dataset spanning 
January 2008 to December 2019, subject to the sample restrictions discussed in the paper.
 
Using these child by investigation level datasets, the program then estimates
investigator-specific rates and standard errors. Relative to our baseline estimates, 
the difference here is that we do not account for randomization strata.  

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

**************************
**(0) SETUP
**************************
clear
set more off
macro drop all
capture log close
set seed 02042023

*Set directories 
global cleand 
global tmpd
global output 

**************************
**(1) USING THE MAIN ANALYSIS SAMPLE FROM 2008 TO 2019, COMPUTE RATES AND STANDARD ERRORS 
**************************

***********
**(1A) COMPUTE INVESTIGATOR RATES
***********
use "${cleand}analysis_sample_investigators_qje.dta", clear 

keep worker_id rotationgroup pre_black fc inv6m count_inv bshare zipcode_vic cps_year vicid

rename pre_black black 
gen white = black==0 

**Estimate investigator placement and subsequent maltreatment rates
gen nofc=fc==0

egen grpworker_id = group(worker_id)
egen cell=group(zipcode_vic cps_year)

levelsof grpworker_id
local levels = "`r(levels)'"

preserve
* Loop over all the outcomes 
foreach var in nofc inv6m {
	* Use full sample for foster care placement
	if "`var'"=="nofc"{
		qui: reghdfe `var' i.grpworker_id, resid noabsorb
	}
	
	* For selectively observed vars, condition on being left at home
	if "`var'"!="nofc"{
		qui: reghdfe `var' i.grpworker_id if nofc == 1, resid noabsorb
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen base_`var' = .
	
	qui{
	foreach i in `levels' {
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		
		if _rc == 0 {
            replace base_`var' =  r(estimate) if grpworker_id == `i'
		}
	}
	}
	drop xbd xb d
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, noabsorb resid
	}
	
	* For other outcomes, condition on not being in fc
	if "`var'"!="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if nofc==1, resid noabsorb
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_`var' = .
	gen w_`var' = .
  
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
            replace w_`var' =  r(estimate) if grpworker_id == `i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
            replace b_`var' =  r(estimate) if grpworker_id == `i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save investigator-level dataset 
*----------------------------------------------------------------------*
cap drop count_inv 
bys worker_id: gen count_inv = _N
bys worker_id: egen count_black = total(black)
bys worker_id : egen count_white = total(white)

* Keep relevant vars
keep base_* b_* w_* worker_id count_inv count_black count_white grpworker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleand}inv_unadjusted_rates_qje.dta", replace
restore

***********
**(1B) COMPUTE STANDARD ERRORS FOR INVESTIGATOR RATES
***********
preserve
* Loop over all the outcomes 
foreach var in nofc inv6m {
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, noabsorb resid 
	}
	
	* For other outcomes, condition on not being in fc
	if "`var'"!="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if nofc==1, resid noabsorb
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_se_`var' = .
	gen w_se_`var' = .
  
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
			replace w_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
			replace b_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save investigator-level dataset 
*----------------------------------------------------------------------*
* Keep relevant vars
keep b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleand}inv_unadjusted_se_qje.dta", replace
restore 

***********
**(1C) COMPUTE STANDARD ERRORS FOR INVESTIGATOR RATES, CLUSTERED BY CHILD AND INVESTIGATOR
***********
* Loop over all the outcomes 
foreach var in nofc inv6m {
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, noabsorb resid cluster(grpworker_id vicid)
	}
	
	* For other outcomes, condition on not being in fc
	if "`var'"!="nofc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if nofc==1, resid noabsorb cluster(grpworker_id vicid)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_twse_`var' = .
	gen w_twse_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
			replace w_twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
			replace b_twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save investigator-level dataset 
*----------------------------------------------------------------------*
* Keep relevant vars
keep b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleand}inv_unadjusted_twse_qje.dta", replace


***********
**(2) MERGE DATASETS AND SAVE FINAL INVESTIGATOR-LEVEL DATASET
***********
use "${cleand}inv_unadjusted_rates_qje.dta", clear 
cap drop _merge 
merge 1:1 worker_id using "${cleand}inv_unadjusted_se_qje.dta", keepus(b_se_* w_se_*)
cap drop _merge 
merge 1:1 worker_id using "${cleand}inv_unadjusted_twse_qje.dta", keepus(b_twse_* w_twse_*)

*Gen additional variables needed for the main analysis 
gen share_black = count_black/count_inv 
egen bshare = wtmean(share_black), weight(count_inv)
	
	foreach x in w_nofc b_nofc w_inv6m b_inv6m {
		replace `x'=0 if `x'<0 
		replace `x'=1 if `x'>1
	}

save "${cleand}inv_unadjusted_rates_se_qje.dta", replace 
